This is a draft vignette, showing a proposed workflow for applying NICHES to the IPF Atlas.
This document uses a downsampled version of the 2019 IPF Atlas.
If we like overall approach, we could try scaling-up to the integrated nucSeq & cell data.
load("~/T5/Kaminski_Lab/control.down.Robj")
load("~/T5/Kaminski_Lab/ipf.down.Robj")
table(Idents(control.down))
##
## ncMonocyte Macrophage_Alveolar NK cMonocyte
## 1058 3000 2744 3000
## Lymphatic Macrophage Fibroblast Basal
## 973 3000 847 114
## Myofibroblast Multiplet cDC2 B
## 204 1000 1364 1227
## VE_Venous T ATII ATI
## 226 3000 2655 502
## VE_Capillary_A VE_Arterial Ciliated Club
## 192 274 1144 226
## VE_Capillary_B B_Plasma ILC_A T_Cytotoxic
## 436 232 276 3000
## Goblet T_Regulatory Mesothelial ILC_B
## 101 316 46 137
## SMC cDC1 DC_Langerhans Mast
## 69 199 18 165
## pDC DC_Mature VE_Peribronchial Pericyte
## 52 151 83 24
## PNEC Ionocyte
## 17 2
table(Idents(ipf.down))
##
## Macrophage Ciliated cMonocyte B
## 3000 3000 3000 3000
## Club Goblet Multiplet Macrophage_Alveolar
## 1855 1044 2765 3000
## T ncMonocyte Mast Fibroblast
## 3000 1931 572 1443
## T_Cytotoxic cDC2 Basal ATII
## 3000 3000 1472 496
## T_Regulatory VE_Peribronchial Aberrant_Basaloid cDC1
## 844 1919 448 373
## NK ILC_B Myofibroblast VE_Venous
## 3000 169 2886 430
## DC_Langerhans pDC B_Plasma VE_Capillary_B
## 282 430 192 494
## VE_Arterial Lymphatic DC_Mature VE_Capillary_A
## 330 733 364 206
## ATI Ionocyte ILC_A SMC
## 176 23 230 556
## Mesothelial Pericyte PNEC
## 233 578 12
merge <- merge(control.down,ipf.down)
#table(merge$Subject_Identity)
table(merge$Disease_Identity,merge$Subject_Identity)
##
## 001C 002C 003C 010I 021I 022I 025I 034C 034I 040I 041I 051I 053I 063I
## Control 789 148 1314 0 0 0 0 147 0 0 0 0 0 0
## IPF 0 0 0 1676 1937 2384 705 0 2514 1533 1137 2092 1099 898
##
## 065C 081C 084C 092C 098C 123I 133C 135I 1372C 137C 138I 145I 157I
## Control 465 161 151 656 3844 0 3283 0 1814 329 0 0 0
## IPF 0 0 0 0 0 694 0 1968 0 0 1527 1693 2509
##
## 158I 160C 166I 174I 177I 179I 192C 208C 209I 210I 212I 214I 218C 221I
## Control 0 980 0 0 0 0 1093 406 0 0 0 0 1172 0
## IPF 2127 0 1447 2420 2141 1360 0 0 1312 748 1481 244 0 1601
##
## 222C 222I 225I 226C 228I 244C 253C 296C 29I 388C 396C 439C 454C 465C
## Control 4098 0 0 1951 0 138 468 1308 0 668 409 3216 621 1432
## IPF 0 3166 3851 0 935 0 0 0 327 0 0 0 0 0
##
## 47I 483C 484C 49I 59I
## Control 0 483 530 0 0
## IPF 622 0 0 1669 669
#table(merge$Manuscript_Identity) == table(Idents(merge))
Defining a color scheme for plotting
# Define colors
col.pal <- list()
col.pal$Disease <- c('orange','purple3')
col.pal$Disease <- c(brewer.pal(6,'Accent')[5:6])
col.pal$Disease <- c('darkorange','blue3')
names(col.pal$Disease) <- c('Control','IPF')
col.pal$Class <- c(brewer.pal(4,'Set1'))
names(col.pal$Class) <- c('Epithelium','Endothelium','Mesenchyme','Immune')
col.pal$Type <- c('firebrick','steelblue','springgreen','purple','salmon','skyblue','midnightblue','orangered','violetred',
'tomato','seashell','sandybrown','saddlebrown','royalblue','plum4',
'lightgoldenrod','lawngreen','forestgreen','dimgray','deeppink',
'red2','paleturquoise1','palevioletred','orchid4','seagreen2','plum1','olivedrab2',
'slateblue','mediumvioletred','sienna','orange','seagreen',
'lightseagreen','mediumpurple4','dodgerblue','goldenrod1','grey30','darkred')
names(col.pal$Type) <- c("Aberrant_Basaloid","ATI","ATII","Basal" ,"Ciliated","Club", "Goblet","Ionocyte","PNEC",
"B","B_Plasma","cDC1","cDC2","cMonocyte","DC_Langerhans","DC_Mature","ILC_A","ILC_B",
"Macrophage","Macrophage_Alveolar","Mast","ncMonocyte","NK","pDC","T","T_Cytotoxic","T_Regulatory",
"Fibroblast","Mesothelial","Myofibroblast","Pericyte","SMC",
"Lymphatic","VE_Arterial","VE_Capillary_A","VE_Capillary_B","VE_Peribronchial","VE_Venous")
Visualizing the starting atlas:
# Ipf cell atlas
merge <- subset(merge, idents='Multiplet',invert=T)
merge <- ScaleData(merge)
merge <- FindVariableFeatures(merge)
merge <- RunPCA(merge)
#ElbowPlot(merge)
merge <- RunUMAP(merge,dims = 1:50)
merge$Manuscript_Identity <- factor(merge$Manuscript_Identity,levels = c("Aberrant_Basaloid","ATI","ATII","Basal" ,"Ciliated","Club", "Goblet","Ionocyte","PNEC",
"B","B_Plasma","cDC1","cDC2","cMonocyte","DC_Langerhans","DC_Mature","ILC_A","ILC_B",
"Macrophage","Macrophage_Alveolar","Mast","ncMonocyte","NK","pDC","T","T_Cytotoxic","T_Regulatory",
"Fibroblast","Mesothelial","Myofibroblast","Pericyte","SMC",
"Lymphatic","VE_Arterial","VE_Capillary_A","VE_Capillary_B","VE_Peribronchial","VE_Venous"))
DimPlot(merge,group.by = 'Manuscript_Identity',cols = col.pal$Type,pt.size = 1,shuffle = T)+ggtitle('IPF Cell Atlas')+
theme(plot.title=element_text(hjust=0))+
theme(plot.title = element_text(size = 30, face = "bold"))+
DarkTheme()+
guides(color=guide_legend(ncol =1,override.aes = list(size=5)))+
NoAxes()
Now we are ready to compute the NICHES information and begin investigating:
split <- SplitObject(merge,split.by = 'Subject_Identity')
options(warn = 1)
for(i in 1:length(split)){
split[[i]] <- RunALRA(split[[i]])
gc()
}
# Save imputed data
gc()
save(split,file='control.ipf.split.imputed.Robj')
load('control.ipf.split.imputed.Robj')
names(table(Idents(merge)))
for(i in 1:length(split)){
Idents(split[[i]]) <- split[[i]]$Manuscript_Identity
split[[i]] <- RenameIdents(split[[i]],c("Aberrant_Basaloid"='Epithelium',
"ATI"='Epithelium',
"ATII"='Epithelium',
"B"='Immune',
"B_Plasma"='Immune',
"Basal"='Epithelium' ,
"cDC1"='Immune',
"cDC2"='Immune',
"Ciliated"='Epithelium',
"Club"='Epithelium',
"cMonocyte"='Immune',
"DC_Langerhans"='Immune',
"DC_Mature"='Immune',
"Fibroblast"='Mesenchyme',
"Goblet"='Epithelium',
"ILC_A"='Immune',
"ILC_B"='Immune' ,
"Ionocyte"='Epithelium',
"Lymphatic"='Endothelium',
"Macrophage"='Immune' ,
"Macrophage_Alveolar"='Immune',
"Mast"='Immune' ,
"Mesothelial"='Mesenchyme' ,
"Myofibroblast"='Mesenchyme',
"ncMonocyte"='Immune',
"NK"='Immune',
"pDC"='Immune',
"Pericyte"='Mesenchyme' ,
"PNEC"='Epithelium',
"SMC"='Mesenchyme',
"T"='Immune',
"T_Cytotoxic"='Immune',
"T_Regulatory"='Immune',
"VE_Arterial"='Endothelium' ,
"VE_Capillary_A"='Endothelium' ,
"VE_Capillary_B"='Endothelium' ,
"VE_Peribronchial"='Endothelium' ,
"VE_Venous"='Endothelium'))
split[[i]]$CellClass <- Idents(split[[i]])
Idents(split[[i]]) <- split[[i]]$Manuscript_Identity
}
names(split[[i]]@meta.data)
We compute SystemToCell, CellToSystem, and CellToCell independently. We ask NICHES to use the ALRA-imputed data slot in this instance, and to map against the FANTOM5 database of known logand-receptor interactions. We tell NICHES to sample all cell type crosses based on the “Manuscript_Identity” slot.
scc.list <- list()
for(i in 1:length(split)){
print(i)
scc.list[[i]] <- RunNICHES(split[[i]],
LR.database="fantom5",
species="human",
assay="alra",
cell_types = "Manuscript_Identity",
meta.data.to.map = names(split[[i]]@meta.data),
SystemToCell = T,
CellToCell = T,
CellToSystem=T)
}
names(scc.list) <- names(split)
temp.list <- list()
for(i in 1:length(scc.list)){
temp.list[[i]] <- scc.list[[i]]$CellToCell # Isolate CellToCell Signaling
}
cell.to.cell <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
cell.to.cell
temp.list <- list()
for(i in 1:length(scc.list)){
temp.list[[i]] <- scc.list[[i]]$SystemToCell # Isolate SystemToCell Signaling
}
system.to.cell <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
system.to.cell
temp.list <- list()
for(i in 1:length(scc.list)){
temp.list[[i]] <- scc.list[[i]]$CellToSystem # Isolate CellToSystem Signaling
}
cell.to.system <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
cell.to.system
save(cell.to.cell,file='cell.to.cell.Robj')
Let’s load the CellToCell data output type for further analysis in this vignette, as it tends to be the easiest to interpret:
load('cell.to.cell.Robj')
cell.to.cell
## An object of class Seurat
## 2346 features across 1071580 samples within 1 assay
## Active assay: CellToCell (2346 features, 0 variable features)
This step is essential to getting clean embeddings, clusterings, and downstream data analysis in later steps
#table(Idents(cell.to.cell))
#table(Idents(cell.to.cell),cell.to.cell$Subject_Identity.Joint)
VlnPlot(cell.to.cell,
features = 'nFeature_CellToCell',
group.by = 'Disease_Identity.Joint',
pt.size=0,log = T)
VlnPlot(cell.to.cell,
features = 'nFeature_CellToCell',
group.by = 'Subject_Identity.Joint',
pt.size=0,log = T)+NoLegend()
From the above, let’s set a cutoff saying tht we only want to consider cell-cell crosses with at least 100 unique features (ligand-receptor mechanisms). We are also not interested in Multiplets as either a sending or a receiving cell. Saving this as a temporary file so can load quickly later.
#table(cell.to.cell$SendingType)
cell.to.cell <- subset(cell.to.cell,subset=SendingType=='Multiplet',invert=T)
#table(cell.to.cell$SendingType)
cell.to.cell <- subset(cell.to.cell,subset=ReceivingType=='Multiplet',invert=T)
#table(cell.to.cell$ReceivingType)
temp <- subset(cell.to.cell,nFeature_CellToCell>100) # Choose this limit based on the above violin plots or similar
temp
save(temp,file = 'ipf.niches.temp.Robj')
gc()
load('ipf.niches.temp.Robj')
VlnPlot(temp,
features = 'nFeature_CellToCell',
group.by = 'Disease_Identity.Joint',
pt.size=0,log = T)
# VlnPlot(temp,
# features = 'nFeature_CellToCell',
# group.by = 'Subject_Identity.Joint',
# pt.size=0,log = T)+NoLegend()
Now that the data is cleaned (limited to high-information crosses) we are ready to embed, cluster, and visualize our NICHES data so that we may identify data-set relevant patterns of interest:
Perform PCA
temp <- ScaleData(temp)
temp <- FindVariableFeatures(temp)
temp <- RunPCA(temp,npcs = 100)
ElbowPlot(temp,ndim=100)
PCHeatmap(temp,balanced=T,cells=200,dims=1:9)
PCHeatmap(temp,balanced=T,cells=200,dims=10:18)
PCHeatmap(temp,balanced=T,cells=200,dims=19:27)
PCHeatmap(temp,balanced=T,cells=200,dims=28:36)
temp <- RunUMAP(temp,dims = 1:30)
DimPlot(temp,raster = FALSE)+NoLegend()
Looks reasonable. Let’s look at how the metadata maps within this embedding:
# Total connectomics
p1 <- DimPlot(temp,group.by = 'Disease_Identity.Sending',shuffle = T,raster = F,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')
p2 <- DimPlot(temp,group.by = 'Subject_Identity.Sending',shuffle = T,raster = F)+NoLegend()+NoAxes()+ggtitle('Subject Identity')
p3 <- DimPlot(temp,group.by = 'CellClass.Sending',shuffle = T,label = F,raster = F,cols = col.pal$Class)+NoAxes()+ggtitle('Sending Cell Class')
p4 <- DimPlot(temp,group.by = 'CellClass.Receiving',shuffle = T,label = F,raster = F,cols = col.pal$Class)+NoAxes()+ggtitle('Receiving Cell Class')
cowplot::plot_grid(p1,p2,p3,p4,align = T)
This looks good. Let’s break this down into individual class-class crosses and look for cell-cell signaling archetypes that are either enriched or lost in IPF vs. Control.
epi.mes <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.mes <- subset(epi.mes,subset=CellClass.Receiving=='Mesenchyme')
epi.mes <- subset(epi.mes,subset=CellClass.Sending!='Multiplet') # Just a precaution
table(epi.mes$CellClass.Joint)
##
## Epithelium - Mesenchyme
## 6142
epi.mes <- ScaleData(epi.mes)
epi.mes <- FindVariableFeatures(epi.mes)
epi.mes <- RunPCA(epi.mes)
ElbowPlot(epi.mes,ndims = 50)
epi.mes <- RunUMAP(epi.mes,dims = 1:10)
epi.mes <- FindNeighbors(epi.mes,dims = 1:10)
epi.mes <- FindClusters(epi.mes,resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6142
## Number of edges: 186998
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8185
## Number of communities: 10
## Elapsed time: 0 seconds
p1 <- DimPlot(epi.mes,group.by = 'Disease_Identity.Sending',shuffle = T)
p2 <- DimPlot(epi.mes,shuffle = T)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Sending',shuffle = T)
p4 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Receiving',shuffle = T)
cowplot::plot_grid(p1,p2,p3,p4)
It looks like cluster 7 is enriched in IPF and is associated with aberrant basaloid cells. Let’s see some mechanisms that are associated with this signaling archetype:
mark.epi.mes <- FindMarkers(epi.mes,ident.1 = '7',only.pos = T)
mark.epi.mes$ratio <- mark.epi.mes$pct.1/mark.epi.mes$pct.2
data.table::setorderv(mark.epi.mes,'ratio',order = -1)
knitr::kable(mark.epi.mes[1:20,])
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | ratio | |
|---|---|---|---|---|---|---|
| IL11—IL11RA | 0 | 0.5870075 | 0.250 | 0.015 | 0 | 16.666667 |
| FGF2—FGFR4 | 0 | 0.2612598 | 0.102 | 0.007 | 0 | 14.571429 |
| CGB8—SIRPA | 0 | 0.3850679 | 0.145 | 0.010 | 0 | 14.500000 |
| PDGFB—PDGFRB | 0 | 1.0384244 | 0.375 | 0.029 | 0 | 12.931035 |
| PDGFB—LRP1 | 0 | 1.3886025 | 0.398 | 0.031 | 0 | 12.838710 |
| IGF2—IGF2R | 0 | 1.3062871 | 0.385 | 0.030 | 0 | 12.833333 |
| EFNA4—EPHA7 | 0 | 0.3057615 | 0.140 | 0.012 | 0 | 11.666667 |
| CXCL12—ACKR3 | 0 | 0.3264253 | 0.105 | 0.009 | 0 | 11.666667 |
| SEMA7A—PLXNC1 | 0 | 0.6341920 | 0.173 | 0.015 | 0 | 11.533333 |
| FGF2—NRP1 | 0 | 1.3083440 | 0.355 | 0.031 | 0 | 11.451613 |
| LTB—TNFRSF1A | 0 | 2.4686443 | 0.355 | 0.032 | 0 | 11.093750 |
| PDGFB—ITGAV | 0 | 0.8274260 | 0.281 | 0.026 | 0 | 10.807692 |
| LTB—LTBR | 0 | 1.4872515 | 0.214 | 0.020 | 0 | 10.700000 |
| LTB—CD40 | 0 | 0.7005923 | 0.107 | 0.010 | 0 | 10.700000 |
| SEMA7A—ITGA1 | 0 | 1.8054061 | 0.556 | 0.052 | 0 | 10.692308 |
| IGF2—INSR | 0 | 0.8759508 | 0.212 | 0.020 | 0 | 10.600000 |
| IGF2—IGF1R | 0 | 1.4794769 | 0.327 | 0.031 | 0 | 10.548387 |
| FGF2—FGFR1 | 0 | 1.6137264 | 0.429 | 0.043 | 0 | 9.976744 |
| SEMA7A—ITGB1 | 0 | 2.0642711 | 0.793 | 0.081 | 0 | 9.790123 |
| FGF2—SDC2 | 0 | 1.1186209 | 0.319 | 0.034 | 0 | 9.382353 |
We can visualize chosen markers within the embedding if we choose, as follows:
p5 <- FeaturePlot(epi.mes,'WNT7A—LRP6',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.mes,'PDGFB—PDGFRB',order = T,pt.size = 1,max.cutoff = 6)
cowplot::plot_grid(p5,p6,nrow=1)
Putting this all together as a small multiple with nice colors:
p1 <- DimPlot(epi.mes,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.mes,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.mes,'WNT7A—LRP6',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.mes,'PDGFB—PDGFRB',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)
Alternatively, we can create a complex heatmap which shows a great deal of information at once, something like as follows (this uses a custom build ‘ArchetypeHeatmap’ function which is a NICHES-generalized wrapper for ComplexHeatmap):
ArchetypeHeatmap(epi.mes,
primary = 'seurat_clusters' ,
secondary = 'Manuscript_Identity.Sending' ,
tertiary = 'Manuscript_Identity.Receiving' ,
quarternary = 'Disease_Identity.Sending' ,
primary.cols = NULL,
secondary.cols = col.pal$Type, # Need to be a named list of colors
tertiary.cols = col.pal$Type,
quarternary.cols = col.pal$Disease,
save.markers = T,
return.markers = T,
MOI.by = 'ratio',
labels = c('Signaling Archetype','Sending Cell Type','Receiving Cell Type','Condition'),
pt.size = 0.5,
return.plot = F,
print.plot = F,
filetag = 'IPF_NICHE_Epi.Mes',
exclude = 'ITGA|ITGB|LRP1')
### We can save our work for later as follows:
# save(epi.mes,file = 'epi.mes.Robj')
# save(mark.epi.mes,file = 'mark.epi.mes.Robj')
Now that we have a standardized workflow, let’s turn our attention to the other (15) class-class crosses, each in turn:
epi.end <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.end <- subset(epi.end,subset=CellClass.Receiving=='Endothelium')
epi.end <- subset(epi.end,subset=CellClass.Sending!='Multiplet')
table(epi.end$CellClass.Joint)
##
## Epithelium - Endothelium
## 5276
epi.end <- ScaleData(epi.end)
epi.end <- FindVariableFeatures(epi.end)
epi.end <- RunPCA(epi.end)
ElbowPlot(epi.end,ndims = 50)
epi.end <- RunUMAP(epi.end,dims = 1:8)
epi.end <- FindNeighbors(epi.end,dims = 1:8)
epi.end <- FindClusters(epi.end,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5276
## Number of edges: 158742
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8360
## Number of communities: 8
## Elapsed time: 0 seconds
p1 <- DimPlot(epi.end,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.end)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)
It looks like cluster 4 is enriched in IPF. Let’s investigate further:
mark.epi.end <- FindMarkers(epi.end,ident.1 = '4',only.pos = T)
mark.epi.end$ratio <- mark.epi.end$pct.1/mark.epi.end$pct.2
data.table::setorderv(mark.epi.end,'ratio',order = -1)
knitr::kable(mark.epi.end[1:20,])
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | ratio | |
|---|---|---|---|---|---|---|
| CSF2—CSF2RB | 0 | 0.7381961 | 0.141 | 0.016 | 0 | 8.812500 |
| LTB—CD40 | 0 | 1.1563510 | 0.113 | 0.013 | 0 | 8.692308 |
| TNFSF15—TNFRSF25 | 0 | 0.2817680 | 0.100 | 0.012 | 0 | 8.333333 |
| MFGE8—PDGFRB | 0 | 0.5416215 | 0.130 | 0.019 | 0 | 6.842105 |
| DLL4—NOTCH2 | 0 | 0.2708609 | 0.126 | 0.019 | 0 | 6.631579 |
| LTB—LTBR | 0 | 1.4476810 | 0.142 | 0.022 | 0 | 6.454546 |
| TNF—LTBR | 0 | 0.4129545 | 0.106 | 0.017 | 0 | 6.235294 |
| FGF1—EGFR | 0 | 0.4543708 | 0.187 | 0.030 | 0 | 6.233333 |
| COL4A1—ITGA2 | 0 | 0.9069791 | 0.180 | 0.029 | 0 | 6.206897 |
| COL6A2—ITGA2 | 0 | 0.9548353 | 0.138 | 0.023 | 0 | 6.000000 |
| BMP2—BMPR1A | 0 | 0.5115480 | 0.162 | 0.027 | 0 | 6.000000 |
| IGF2—IGF2R | 0 | 0.8470069 | 0.177 | 0.030 | 0 | 5.900000 |
| PLAU—ITGB2 | 0 | 0.7596012 | 0.128 | 0.022 | 0 | 5.818182 |
| EFNB2—RHBDL2 | 0 | 0.7260124 | 0.248 | 0.043 | 0 | 5.767442 |
| COL5A1—SDC3 | 0 | 0.3803499 | 0.119 | 0.021 | 0 | 5.666667 |
| FGF2—FGFR1 | 0 | 0.7229520 | 0.158 | 0.028 | 0 | 5.642857 |
| BMP2—BMPR1B | 0 | 0.4650717 | 0.128 | 0.023 | 0 | 5.565217 |
| IGF2—INSR | 0 | 1.4002259 | 0.165 | 0.030 | 0 | 5.500000 |
| FGF1—CD44 | 0 | 0.5280894 | 0.141 | 0.026 | 0 | 5.423077 |
| VCAN—EGFR | 0 | 1.5668954 | 0.241 | 0.045 | 0 | 5.355556 |
Picking a few markers of interest:
p5 <- FeaturePlot(epi.end,'FGF2—FGFR1',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.end,'BMP5—ACVR2A',order = T,pt.size = 1,max.cutoff = 4)
cowplot::plot_grid(p5,p6)
We can then put all together:
p1 <- DimPlot(epi.end,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.end,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.end,'FGF2—FGFR1',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.end,'BMP5—ACVR2A',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)
Saving our work for later:
save(mark.epi.end,file = 'mark.epi.end.Robj')
save(epi.end,file = 'epi.end.Robj')
Epithelial autocrine signalinqg:
epi.epi <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.epi <- subset(epi.epi,subset=CellClass.Receiving=='Epithelium')
epi.epi <- subset(epi.epi,subset=CellClass.Sending!='Multiplet')
table(epi.epi$CellClass.Joint)
##
## Epithelium - Epithelium
## 18436
epi.epi <- ScaleData(epi.epi)
epi.epi <- FindVariableFeatures(epi.epi)
epi.epi <- RunPCA(epi.epi)
ElbowPlot(epi.epi,ndims = 50)
epi.epi <- RunUMAP(epi.epi,dims = 1:8)
epi.epi <- FindNeighbors(epi.epi,dims = 1:8)
epi.epi <- FindClusters(epi.epi,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 18436
## Number of edges: 544690
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8841
## Number of communities: 9
## Elapsed time: 2 seconds
p1 <- DimPlot(epi.epi,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.epi)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)
Epi-epi autocrine marker mechanisms specific to IPF / aberrant basaloids (cluster 2):
mark.epi.epi <- FindMarkers(epi.epi,ident.1 = '2',only.pos = T)
mark.epi.epi$ratio <- mark.epi.epi$pct.1/mark.epi.epi$pct.2
data.table::setorderv(mark.epi.epi,'ratio',order = -1)
knitr::kable(mark.epi.epi[1:20,])
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | ratio | |
|---|---|---|---|---|---|---|
| IL23A—IL12RB1 | 0 | 0.3158816 | 0.108 | 0.006 | 0 | 18.000000 |
| COL5A1—SDC3 | 0 | 0.4062673 | 0.118 | 0.007 | 0 | 16.857143 |
| SEMA7A—ITGA1 | 0 | 0.5935414 | 0.166 | 0.010 | 0 | 16.600000 |
| EFNB1—EPHB6 | 0 | 0.4124380 | 0.178 | 0.012 | 0 | 14.833333 |
| EFNB1—EPHB3 | 0 | 0.4473471 | 0.207 | 0.014 | 0 | 14.785714 |
| FGF2—FGFR3 | 0 | 0.3041765 | 0.118 | 0.008 | 0 | 14.750000 |
| COL6A2—ITGA1 | 0 | 1.1209605 | 0.180 | 0.013 | 0 | 13.846154 |
| FGF2—NRP1 | 0 | 0.4402414 | 0.122 | 0.009 | 0 | 13.555556 |
| MFAP2—NOTCH1 | 0 | 0.4014025 | 0.149 | 0.011 | 0 | 13.545454 |
| PLAU—ITGA5 | 0 | 0.4948389 | 0.106 | 0.008 | 0 | 13.250000 |
| EFNA4—EPHA1 | 0 | 0.3796532 | 0.196 | 0.015 | 0 | 13.066667 |
| TNF—TNFRSF21 | 0 | 0.6304780 | 0.102 | 0.009 | 0 | 11.333333 |
| NOV—PLXNA1 | 0 | 0.3509425 | 0.101 | 0.009 | 0 | 11.222222 |
| PGF—NRP1 | 0 | 0.5190593 | 0.140 | 0.013 | 0 | 10.769231 |
| FGF2—SDC1 | 0 | 0.9638212 | 0.198 | 0.019 | 0 | 10.421053 |
| COL5A1—ITGA1 | 0 | 0.5932909 | 0.164 | 0.016 | 0 | 10.250000 |
| PDGFB—ITGAV | 0 | 0.9152901 | 0.150 | 0.015 | 0 | 10.000000 |
| EFNB1—EPHB4 | 0 | 0.5906749 | 0.249 | 0.025 | 0 | 9.960000 |
| COL18A1—KDR | 0 | 1.1758647 | 0.254 | 0.027 | 0 | 9.407407 |
| WNT7A—FZD5 | 0 | 0.4435647 | 0.176 | 0.019 | 0 | 9.263158 |
Finding a few autocrine mechanisms of interest that seen disease relevant:
p5 <- FeaturePlot(epi.epi,'WNT7A—FZD5',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.epi,'NOV—NOTCH1',order = T,pt.size = 1,max.cutoff = 6)
cowplot::plot_grid(p5,p6)
Epi-epi small multiple:
p1 <- DimPlot(epi.epi,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.epi,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.epi,'WNT7A—FZD5',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.epi,'NOV—NOTCH1',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)
Saving for later:
save(mark.epi.epi,file = 'mark.epi.epi.Robj')
save(epi.epi,file = 'epi.epi.Robj')
Now let’s look at epithelium-immune signaling specifically:
epi.imm <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.imm <- subset(epi.imm,subset=CellClass.Receiving=='Immune')
epi.imm <- subset(epi.imm,subset=CellClass.Sending!='Multiplet')
table(epi.imm$CellClass.Joint)
##
## Epithelium - Immune
## 17904
epi.imm <- ScaleData(epi.imm)
epi.imm <- FindVariableFeatures(epi.imm)
epi.imm <- RunPCA(epi.imm)
ElbowPlot(epi.imm,ndims = 50)
epi.imm <- RunUMAP(epi.imm,dims = 1:30)
epi.imm <- FindNeighbors(epi.imm,dims = 1:30)
epi.imm <- FindClusters(epi.imm,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 17904
## Number of edges: 616654
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8788
## Number of communities: 15
## Elapsed time: 2 seconds
p1 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.imm)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)
Let’s find some marker for cluster 5 which is driven predominantly by aberrant basaloids:
mark.epi.imm <- FindMarkers(epi.imm,ident.1 = '5',only.pos = T)
mark.epi.imm$ratio <- mark.epi.imm$pct.1/mark.epi.imm$pct.2
data.table::setorderv(mark.epi.imm,'ratio',order = -1)
knitr::kable(mark.epi.imm[1:20,])
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | ratio | |
|---|---|---|---|---|---|---|
| CGB8—SIRPA | 0 | 1.4009639 | 0.316 | 0.023 | 0 | 13.739130 |
| LTB—CD40 | 0 | 1.2700641 | 0.144 | 0.013 | 0 | 11.076923 |
| PDGFB—LRP1 | 0 | 0.4654225 | 0.166 | 0.015 | 0 | 11.066667 |
| PDGFB—ITGAV | 0 | 0.5504246 | 0.231 | 0.021 | 0 | 11.000000 |
| LTB—LTBR | 0 | 1.1479867 | 0.141 | 0.014 | 0 | 10.071429 |
| LTB—TNFRSF1A | 0 | 2.0525071 | 0.281 | 0.028 | 0 | 10.035714 |
| IGF2—INSR | 0 | 1.0476898 | 0.282 | 0.032 | 0 | 8.812500 |
| SEMA7A—PLXNC1 | 0 | 1.0434091 | 0.358 | 0.045 | 0 | 7.955556 |
| TNF—TNFRSF1A | 0 | 0.7465077 | 0.224 | 0.029 | 0 | 7.724138 |
| TNF—LTBR | 0 | 0.2675594 | 0.100 | 0.013 | 0 | 7.692308 |
| COL6A2—ITGB1 | 0 | 2.3063884 | 0.760 | 0.102 | 0 | 7.450980 |
| SEMA7A—ITGB1 | 0 | 1.6172146 | 0.682 | 0.092 | 0 | 7.413043 |
| FGF2—SDC2 | 0 | 1.0503114 | 0.229 | 0.031 | 0 | 7.387097 |
| CXCL12—ITGB1 | 0 | 0.4788986 | 0.216 | 0.030 | 0 | 7.200000 |
| FGF2—SDC4 | 0 | 0.4202222 | 0.144 | 0.020 | 0 | 7.200000 |
| CXCL12—CD4 | 0 | 0.3060225 | 0.128 | 0.018 | 0 | 7.111111 |
| IGF2—IGF2R | 0 | 1.2964819 | 0.415 | 0.059 | 0 | 7.033898 |
| SERPINE1—ITGAV | 0 | 2.2619087 | 0.365 | 0.052 | 0 | 7.019231 |
| IGFL1—IGFLR1 | 0 | 1.3580303 | 0.215 | 0.032 | 0 | 6.718750 |
| SERPINE1—ITGB5 | 0 | 1.2895377 | 0.141 | 0.021 | 0 | 6.714286 |
A couple mechanisms of interest (TNF-C and TNF-A!)
p5 <- FeaturePlot(epi.imm,'LTB—LTBR',order = T,pt.size = 1,max.cutoff = 30) #TNF-C
p6 <- FeaturePlot(epi.imm,'TNF—TNFRSF1A',order = T,pt.size = 1,max.cutoff = 15)
cowplot::plot_grid(p5,p6)
Small multiple:
# Epi-Imm
p1 <- DimPlot(epi.imm,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=4,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))+theme(legend.text = element_text(size=8))
p5 <- FeaturePlot(epi.imm,'LTB—LTBR',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.imm,'TNF—TNFRSF1A',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)
Saving our work for later:
save(mark.epi.imm,file = 'mark.epi.imm.Robj')
save(epi.imm,file = 'epi.imm.Robj')
We can also go fishing for specific markers. Here are two epithelial-associated cytokines that came up in talks at ATS. We can quickly query these within a given object and ask ‘who is listening’:
p1 <- DimPlot(epi.imm,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=4,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))+theme(legend.text = element_text(size=8))
p5 <- FeaturePlot(epi.imm,'IL11—IL6ST',order = T,pt.size = 0.5,max.cutoff = 30,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.imm,'CCL2—CCR1',order = T,pt.size = 0.5,max.cutoff = 30,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)
Do we like this? Does it align well with other work? Suggestions for improvement?